In [ ]:
import numpy as np
import pandas as pd
In [ ]:
mydataset = {
  'cars': ["BMW", "Volvo", "Ford"],
  'passings': [3, 7, 2]
}

myvar = pd.DataFrame(mydataset)

print(myvar)
    cars  passings
0    BMW         3
1  Volvo         7
2   Ford         2

Iris Data Set¶

In [ ]:
url = "Iris.csv"
df = pd.read_csv(url)
print(df)
      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \
0      1            5.1           3.5            1.4           0.2   
1      2            4.9           3.0            1.4           0.2   
2      3            4.7           3.2            1.3           0.2   
3      4            4.6           3.1            1.5           0.2   
4      5            5.0           3.6            1.4           0.2   
..   ...            ...           ...            ...           ...   
145  146            6.7           3.0            5.2           2.3   
146  147            6.3           2.5            5.0           1.9   
147  148            6.5           3.0            5.2           2.0   
148  149            6.2           3.4            5.4           2.3   
149  150            5.9           3.0            5.1           1.8   

            Species  
0       Iris-setosa  
1       Iris-setosa  
2       Iris-setosa  
3       Iris-setosa  
4       Iris-setosa  
..              ...  
145  Iris-virginica  
146  Iris-virginica  
147  Iris-virginica  
148  Iris-virginica  
149  Iris-virginica  

[150 rows x 6 columns]

Head¶

In [ ]:
df.head(3)
Out[ ]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa

Tail¶

In [ ]:
df.tail(6)
Out[ ]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
144 145 6.7 3.3 5.7 2.5 Iris-virginica
145 146 6.7 3.0 5.2 2.3 Iris-virginica
146 147 6.3 2.5 5.0 1.9 Iris-virginica
147 148 6.5 3.0 5.2 2.0 Iris-virginica
148 149 6.2 3.4 5.4 2.3 Iris-virginica
149 150 5.9 3.0 5.1 1.8 Iris-virginica

Info¶

In [ ]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB

Describe¶

In [ ]:
df.describe()
Out[ ]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
count 150.000000 150.000000 150.000000 150.000000 150.000000
mean 75.500000 5.843333 3.054000 3.758667 1.198667
std 43.445368 0.828066 0.433594 1.764420 0.763161
min 1.000000 4.300000 2.000000 1.000000 0.100000
25% 38.250000 5.100000 2.800000 1.600000 0.300000
50% 75.500000 5.800000 3.000000 4.350000 1.300000
75% 112.750000 6.400000 3.300000 5.100000 1.800000
max 150.000000 7.900000 4.400000 6.900000 2.500000
In [ ]:
df.columns
Out[ ]:
Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

Indexing and Selecting¶

In [ ]:
Sepla = df["SepalLengthCm"]
print(Sepla.head())
0    5.1
1    4.9
2    4.7
3    4.6
4    5.0
Name: SepalLengthCm, dtype: float64
In [ ]:
url = "Iris.csv"
df = pd.read_csv(url,index_col="Id")
Sepla = df[["SepalLengthCm","SepalWidthCm","PetalLengthCm"]]
print(Sepla.head())
    SepalLengthCm  SepalWidthCm  PetalLengthCm
Id                                            
1             5.1           3.5            1.4
2             4.9           3.0            1.4
3             4.7           3.2            1.3
4             4.6           3.1            1.5
5             5.0           3.6            1.4
In [ ]:
gk = df.groupby("Species")
gk.first()
Out[ ]:
SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
Species
Iris-setosa 5.1 3.5 1.4 0.2
Iris-versicolor 7.0 3.2 4.7 1.4
Iris-virginica 6.3 3.3 6.0 2.5
In [ ]:
df.aggregate(['sum','min'])
Out[ ]:
SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
sum 876.5 458.1 563.8 179.8 Iris-setosaIris-setosaIris-setosaIris-setosaIr...
min 4.3 2.0 1.0 0.1 Iris-setosa
In [ ]:
df.iloc[[0, 1]]
Out[ ]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
In [ ]:
df.loc[[0,1,2,3,4,5,]]
Out[ ]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa
3 4 4.6 3.1 1.5 0.2 Iris-setosa
4 5 5.0 3.6 1.4 0.2 Iris-setosa
5 6 5.4 3.9 1.7 0.4 Iris-setosa

IndiaCrime¶

In [ ]:
ind = pd.read_csv("IndiaCrime.csv")
In [ ]:
ind.head(15)
Out[ ]:
Area_Name Year Group_Name Sub_Group_Name Cases_Property_Recovered Cases_Property_Stolen Value_of_Property_Recovered Value_of_Property_Stolen
0 Andaman & Nicobar Islands 2001 Burglary - Property 3. Burglary 27 64 755858 1321961
1 Andhra Pradesh 2001 Burglary - Property 3. Burglary 3321 7134 51483437 147019348
2 Arunachal Pradesh 2001 Burglary - Property 3. Burglary 66 248 825115 4931904
3 Assam 2001 Burglary - Property 3. Burglary 539 2423 3722850 21466955
4 Bihar 2001 Burglary - Property 3. Burglary 367 3231 2327135 17023937
5 Chandigarh 2001 Burglary - Property 3. Burglary 119 364 1804823 10217378
6 Chhattisgarh 2001 Burglary - Property 3. Burglary 1169 4144 6518261 30457033
7 Dadra & Nagar Haveli 2001 Burglary - Property 3. Burglary 10 34 247140 1333389
8 Daman & Diu 2001 Burglary - Property 3. Burglary 7 43 479300 2084845
9 Delhi 2001 Burglary - Property 3. Burglary 642 3029 39632177 150033824
10 Goa 2001 Burglary - Property 3. Burglary 65 347 895875 9991574
11 Gujarat 2001 Burglary - Property 3. Burglary 1124 4928 20821285 141650158
12 Haryana 2001 Burglary - Property 3. Burglary 1257 3098 27369980 64576543
13 Himachal Pradesh 2001 Burglary - Property 3. Burglary 93 812 2936881 17092490
14 Jammu & Kashmir 2001 Burglary - Property 3. Burglary 161 1345 2905184 27222163
In [ ]:
ind.tail(10)
Out[ ]:
Area_Name Year Group_Name Sub_Group_Name Cases_Property_Recovered Cases_Property_Stolen Value_of_Property_Recovered Value_of_Property_Stolen
2439 Odisha 2010 Total Property 7. Total Property Stolen & Recovered 5690 12691 311033656 1116660883
2440 Puducherry 2010 Total Property 7. Total Property Stolen & Recovered 325 625 18752582 30249484
2441 Punjab 2010 Total Property 7. Total Property Stolen & Recovered 5885 9873 646232099 1056728815
2442 Rajasthan 2010 Total Property 7. Total Property Stolen & Recovered 8551 28152 854388626 1395764020
2443 Sikkim 2010 Total Property 7. Total Property Stolen & Recovered 38 134 1444190 9445146
2444 Tamil Nadu 2010 Total Property 7. Total Property Stolen & Recovered 16125 21509 660311804 1317919190
2445 Tripura 2010 Total Property 7. Total Property Stolen & Recovered 192 879 5666102 33032746
2446 Uttar Pradesh 2010 Total Property 7. Total Property Stolen & Recovered 9130 35068 577591772 1442670414
2447 Uttarakhand 2010 Total Property 7. Total Property Stolen & Recovered 964 2234 47135685 123398840
2448 West Bengal 2010 Total Property 7. Total Property Stolen & Recovered 4548 23759 1168242161 5015168687
In [ ]:
ind.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2449 entries, 0 to 2448
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   Area_Name                    2449 non-null   object
 1   Year                         2449 non-null   int64 
 2   Group_Name                   2449 non-null   object
 3   Sub_Group_Name               2449 non-null   object
 4   Cases_Property_Recovered     2449 non-null   int64 
 5   Cases_Property_Stolen        2449 non-null   int64 
 6   Value_of_Property_Recovered  2449 non-null   int64 
 7   Value_of_Property_Stolen     2449 non-null   int64 
dtypes: int64(5), object(3)
memory usage: 153.2+ KB
In [ ]:
ind.describe()
Out[ ]:
Year Cases_Property_Recovered Cases_Property_Stolen Value_of_Property_Recovered Value_of_Property_Stolen
count 2449.000000 2449.000000 2449.000000 2.449000e+03 2.449000e+03
mean 2005.499388 1232.892201 3419.420988 5.859924e+07 2.465431e+08
std 2.873294 3079.573907 8136.256820 2.494403e+08 9.670035e+08
min 2001.000000 0.000000 0.000000 0.000000e+00 0.000000e+00
25% 2003.000000 13.000000 45.000000 6.845700e+05 3.649018e+06
50% 2005.000000 113.000000 358.000000 5.851830e+06 2.701800e+07
75% 2008.000000 722.000000 1875.000000 3.406395e+07 1.214580e+08
max 2010.000000 27960.000000 80663.000000 7.470011e+09 2.377625e+10
In [ ]:
ind.shape
Out[ ]:
(2449, 8)
In [ ]:
ind.columns
Out[ ]:
Index(['Area_Name', 'Year', 'Group_Name', 'Sub_Group_Name',
       'Cases_Property_Recovered', 'Cases_Property_Stolen',
       'Value_of_Property_Recovered', 'Value_of_Property_Stolen'],
      dtype='object')
In [ ]:
ind = pd.read_csv("IndiaCrime.csv",index_col=["Area_Name","Value_of_Property_Stolen"])
ind.head()
Out[ ]:
Year Group_Name Sub_Group_Name Cases_Property_Recovered Cases_Property_Stolen Value_of_Property_Recovered
Area_Name Value_of_Property_Stolen
Andaman & Nicobar Islands 1321961 2001 Burglary - Property 3. Burglary 27 64 755858
Andhra Pradesh 147019348 2001 Burglary - Property 3. Burglary 3321 7134 51483437
Arunachal Pradesh 4931904 2001 Burglary - Property 3. Burglary 66 248 825115
Assam 21466955 2001 Burglary - Property 3. Burglary 539 2423 3722850
Bihar 17023937 2001 Burglary - Property 3. Burglary 367 3231 2327135
In [ ]:
ind = pd.read_csv("IndiaCrime.csv",index_col=["Sub_Group_Name","Value_of_Property_Recovered","Cases_Property_Recovered"])
ind.head()
Out[ ]:
Area_Name Year Group_Name Cases_Property_Stolen Value_of_Property_Stolen
Sub_Group_Name Value_of_Property_Recovered Cases_Property_Recovered
3. Burglary 755858 27 Andaman & Nicobar Islands 2001 Burglary - Property 64 1321961
51483437 3321 Andhra Pradesh 2001 Burglary - Property 7134 147019348
825115 66 Arunachal Pradesh 2001 Burglary - Property 248 4931904
3722850 539 Assam 2001 Burglary - Property 2423 21466955
2327135 367 Bihar 2001 Burglary - Property 3231 17023937
In [ ]:
ind.dropna()
Out[ ]:
Area_Name Year Group_Name Sub_Group_Name Cases_Property_Recovered Cases_Property_Stolen Value_of_Property_Recovered
Value_of_Property_Stolen
1321961 Andaman & Nicobar Islands 2001 Burglary - Property 3. Burglary 27 64 755858
147019348 Andhra Pradesh 2001 Burglary - Property 3. Burglary 3321 7134 51483437
4931904 Arunachal Pradesh 2001 Burglary - Property 3. Burglary 66 248 825115
21466955 Assam 2001 Burglary - Property 3. Burglary 539 2423 3722850
17023937 Bihar 2001 Burglary - Property 3. Burglary 367 3231 2327135
... ... ... ... ... ... ... ...
1317919190 Tamil Nadu 2010 Total Property 7. Total Property Stolen & Recovered 16125 21509 660311804
33032746 Tripura 2010 Total Property 7. Total Property Stolen & Recovered 192 879 5666102
1442670414 Uttar Pradesh 2010 Total Property 7. Total Property Stolen & Recovered 9130 35068 577591772
123398840 Uttarakhand 2010 Total Property 7. Total Property Stolen & Recovered 964 2234 47135685
5015168687 West Bengal 2010 Total Property 7. Total Property Stolen & Recovered 4548 23759 1168242161

2449 rows × 7 columns

In [ ]:
ind.aggregate(['sum','min'])
Out[ ]:
Area_Name Year Group_Name Cases_Property_Stolen Value_of_Property_Stolen
sum Andaman & Nicobar IslandsAndhra PradeshArunach... 4911468 Burglary - PropertyBurglary - PropertyBurglary... 8374162 603784038161
min Andaman & Nicobar Islands 2001 Burglary - Property 0 0
In [ ]:
gk = ind.groupby("Group_Name")
gk.first()
Out[ ]:
Area_Name Year Cases_Property_Stolen Value_of_Property_Stolen
Group_Name
Burglary - Property Andaman & Nicobar Islands 2001 64 1321961
Criminal Breach of Trust - Property Andaman & Nicobar Islands 2001 10 1226967
Dacoity -Property Andaman & Nicobar Islands 2001 0 0
Other heads of Property Andaman & Nicobar Islands 2001 0 0
Robbery - Property Andaman & Nicobar Islands 2001 4 40000
Theft - Property Andaman & Nicobar Islands 2001 65 595549
Total Property Andaman & Nicobar Islands 2001 143 3184477
In [ ]:
url = "Iris.csv"
df = pd.read_csv(url)
df.plot()
Out[ ]:
<Axes: >
In [ ]:
df.columns
Out[ ]:
Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')
In [ ]:
df.plot(x="SepalLengthCm",y="SepalWidthCm")
df.plot(x="SepalLengthCm",y="SepalWidthCm",kind="scatter")
Out[ ]:
<Axes: xlabel='SepalLengthCm', ylabel='SepalWidthCm'>
In [ ]:
df.plot(kind = 'bar',subplots=True,figsize=(8,8))
Out[ ]:
array([<Axes: title={'center': 'Id'}>,
       <Axes: title={'center': 'SepalLengthCm'}>,
       <Axes: title={'center': 'SepalWidthCm'}>,
       <Axes: title={'center': 'PetalLengthCm'}>,
       <Axes: title={'center': 'PetalWidthCm'}>], dtype=object)
In [ ]:
df.plot(kind='bar')
Out[ ]:
<Axes: >
In [ ]:
df.plot(kind ='barh',stacked=True)
Out[ ]:
<Axes: >
In [ ]:
df.plot.hist()
Out[ ]:
<Axes: ylabel='Frequency'>
In [ ]:
df.plot.area(stacked=False)
Out[ ]:
<Axes: >

India¶

In [ ]:
ind = pd.read_csv("IndiaCrime.csv")
ind.columns
Out[ ]:
Index(['Area_Name', 'Year', 'Group_Name', 'Sub_Group_Name',
       'Cases_Property_Recovered', 'Cases_Property_Stolen',
       'Value_of_Property_Recovered', 'Value_of_Property_Stolen'],
      dtype='object')
In [ ]:
ind.plot(x="Cases_Property_Recovered",y="Cases_Property_Stolen")
Out[ ]:
<Axes: xlabel='Cases_Property_Recovered'>
In [ ]:
ind.plot(x="Cases_Property_Recovered",y="Cases_Property_Stolen",kind="bar",figsize=(120,8))
Out[ ]:
<Axes: xlabel='Cases_Property_Recovered'>
In [ ]:
ind.plot(kind="bar",figsize=(30,30),subplots=True)
Out[ ]:
array([<Axes: title={'center': 'Year'}>,
       <Axes: title={'center': 'Cases_Property_Recovered'}>,
       <Axes: title={'center': 'Cases_Property_Stolen'}>,
       <Axes: title={'center': 'Value_of_Property_Recovered'}>,
       <Axes: title={'center': 'Value_of_Property_Stolen'}>], dtype=object)
In [ ]:
ind.plot(figsize=(8,8),subplots=True)
Out[ ]:
array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >], dtype=object)
In [ ]:
ind.plot.hist()
Out[ ]:
<Axes: ylabel='Frequency'>
In [ ]:
ind.plot.area(stacked=False)
Out[ ]:
<Axes: >
In [ ]:
ind.plot.area()
Out[ ]:
<Axes: >
In [ ]:
ind.plot(x="Cases_Property_Recovered",y="Cases_Property_Stolen",kind="scatter")
ind.plot(x="Cases_Property_Recovered",y="Cases_Property_Stolen",kind="pie",figsize=(30,30))
Out[ ]:
<Axes: ylabel='Cases_Property_Stolen'>
In [ ]:
ind.plot(kind ='barh',stacked=True)
Out[ ]:
<Axes: >
In [ ]: